import sys
import os
import time
import numpy as np
import pandas as pd
import umap
print('NOVA_HOME is at', os.getenv('NOVA_HOME'))
sys.path.insert(1, os.getenv('NOVA_HOME'))
%load_ext autoreload
%autoreload 2
from src.common.utils import load_config_file
from src.embeddings.embeddings_utils import load_embeddings
from visualizations import *
from utils import summarize_times, merge_batches_by_key, correlate_columns
NOVA_HOME is at /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA
## Load batch 9 umap1
config_path_data = 'manuscript/manuscript_figures_data_config_80pct/newNeuronsD8FigureConfig_UMAP1_B9'
output_folder_path = '/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen'
config_data = load_config_file(config_path_data, 'data')
config_data.OUTPUTS_FOLDER = output_folder_path
embeddings, labels, paths = load_embeddings(output_folder_path, config_data)
labels = [s.split('_', 1)[0] if '_' in s else s for s in labels]
2025-07-31 11:23:05 INFO: [newNeuronsD8FigureConfig_UMAP1_B9] Init (log path: /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen/logs/310725_112305_355795_723989_galavir_sysdashboardsysjupyter.log; JOBID: 723989 Username: galavir) JOBNAME: sysdashboardsysjupyter 2025-07-31 11:23:05 INFO: [newNeuronsD8FigureConfig_UMAP1_B9] NOVA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA, NOVA_DATA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA/input 2025-07-31 11:23:05 INFO: [load_embeddings] experiment_type = neuronsDay8_new_CLEAN 2025-07-31 11:23:05 INFO: [load_embeddings] input_folders = ['batch9'] 2025-07-31 11:23:05 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-07-31 11:23:34 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-07-31 11:23:42 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-07-31 11:23:46 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-07-31 11:23:48 INFO: [load_embeddings] embeddings shape: (196652, 192) 2025-07-31 11:23:48 INFO: [load_embeddings] labels shape: (196652,) 2025-07-31 11:23:48 INFO: [load_embeddings] example label: KIF5A_WT_Untreated 2025-07-31 11:23:48 INFO: [load_embeddings] paths shape: (196652,)
df = pd.read_csv('output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP1_B9_euclidean_detailed:False.csv')
for col in ['label1','label2']:
df[col] = df[col].str.split(pat='_', n=1).str[0]
df
| label1 | label2 | block_size | total_pairs | dist_time_s | stats_time_s | p50 | |
|---|---|---|---|---|---|---|---|
| 0 | ANXA11 | ANXA11 | 3071 | 4713985 | 0.095507 | 0.010397 | 0.591702 |
| 1 | ANXA11 | CLTC | 3071 | 19565341 | 0.000964 | 0.002397 | 0.817852 |
| 2 | ANXA11 | Calreticulin | 3071 | 20403724 | 0.000759 | 0.002438 | 0.909104 |
| 3 | ANXA11 | DAPI | 3071 | 193595840 | 0.001228 | 0.022026 | 1.067759 |
| 4 | ANXA11 | DCP1A | 3071 | 20965717 | 0.000929 | 0.002503 | 0.886376 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 430 | TOMM20 | Tubulin | 5271 | 31457328 | 0.005623 | 0.003332 | 0.751270 |
| 431 | TOMM20 | mitotracker | 5271 | 27815067 | 0.000812 | 0.002985 | 0.817384 |
| 432 | Tubulin | Tubulin | 5968 | 17805528 | 0.021730 | 0.002307 | 0.506859 |
| 433 | Tubulin | mitotracker | 5968 | 31493136 | 0.000793 | 0.003267 | 0.796839 |
| 434 | mitotracker | mitotracker | 5277 | 13920726 | 0.018649 | 0.002018 | 0.512869 |
435 rows × 7 columns
summarize_times(df) ## Approx run time is Total dist_time_s + Total stats_time_s
Max dist_time_s: 0.371s Total dist_time_s: 2.657s Mean dist_time_s: 0.006s Max stats_time_s: 0.236s Total stats_time_s:3.666s Mean stats_time: 0.008s
## Load distances calculated with more statistics
df = pd.read_csv('output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP1_B9_euclidean_detailed:True.csv')
for col in ['label1','label2']:
df[col] = df[col].str.split(pat='_', n=1).str[0]
df ## When detailed:True more statistics are calculated
| label1 | label2 | block_size | total_pairs | dist_time_s | stats_time_s | p5 | p10 | p25 | p50 | p75 | p90 | p95 | lower_whisker | upper_whisker | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ANXA11 | ANXA11 | 3071 | 4713985 | 0.081689 | 0.062415 | 0.393329 | 0.429983 | 0.499990 | 0.591702 | 0.697070 | 0.798619 | 0.859730 | 0.204370 | 0.992690 |
| 1 | ANXA11 | CLTC | 3071 | 19565341 | 0.010012 | 0.471030 | 0.679525 | 0.707355 | 0.757179 | 0.817852 | 0.881633 | 0.939887 | 0.975436 | 0.570497 | 1.068314 |
| 2 | ANXA11 | Calreticulin | 3071 | 20403724 | 0.001989 | 0.528248 | 0.794656 | 0.821079 | 0.863632 | 0.909104 | 0.953718 | 0.995312 | 1.022292 | 0.728503 | 1.088847 |
| 3 | ANXA11 | DAPI | 3071 | 193595840 | 0.006846 | 4.444259 | 0.965059 | 0.987980 | 1.024955 | 1.067759 | 1.113970 | 1.157091 | 1.182280 | 0.891431 | 1.247494 |
| 4 | ANXA11 | DCP1A | 3071 | 20965717 | 0.008053 | 0.550683 | 0.729927 | 0.766814 | 0.826225 | 0.886376 | 0.944993 | 1.001384 | 1.038418 | 0.648073 | 1.123144 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 430 | TOMM20 | Tubulin | 5271 | 31457328 | 0.004007 | 0.768205 | 0.582171 | 0.619214 | 0.681871 | 0.751271 | 0.818071 | 0.874533 | 0.906778 | 0.477571 | 1.022371 |
| 431 | TOMM20 | mitotracker | 5271 | 27815067 | 0.001835 | 0.711481 | 0.681019 | 0.715796 | 0.766910 | 0.817384 | 0.865275 | 0.908640 | 0.935575 | 0.619364 | 1.012821 |
| 432 | Tubulin | Tubulin | 5968 | 17805528 | 0.039008 | 0.451457 | 0.344070 | 0.375719 | 0.434291 | 0.506859 | 0.583979 | 0.653445 | 0.694520 | 0.209758 | 0.808512 |
| 433 | Tubulin | mitotracker | 5968 | 31493136 | 0.001970 | 0.747374 | 0.616305 | 0.656065 | 0.722193 | 0.796839 | 0.872333 | 0.937772 | 0.974660 | 0.496982 | 1.097544 |
| 434 | mitotracker | mitotracker | 5277 | 13920726 | 0.037175 | 0.003469 | 0.331565 | 0.363687 | 0.426942 | 0.512869 | 0.612212 | 0.706330 | 0.763051 | 0.149039 | 0.890115 |
435 rows × 15 columns
summarize_times(df) ## This is much faster when not calculating more stats
Max dist_time_s: 0.572s Total dist_time_s: 3.804s Mean dist_time_s: 0.009s Max stats_time_s: 70.131s Total stats_time_s:456.615s Mean stats_time: 1.050s
for label in np.unique(labels):
print('Marker:', label)
plot_custom_boxplot(df[(df['label1'].str.contains(label))|(df['label2'].str.contains(label))])
Marker: ANXA11
Marker: CLTC
Marker: Calreticulin
Marker: DAPI
Marker: DCP1A
Marker: FMRP
Marker: FUS
Marker: G3BP1
Marker: GM130
Marker: HNRNPA1
Marker: KIF5A
Marker: LAMP1
Marker: LSM14A
Marker: NCL
Marker: NEMO
Marker: NONO
Marker: PEX14
Marker: PML
Marker: PSD95
Marker: PURA
Marker: Phalloidin
Marker: SNCA
Marker: SON
Marker: SQSTM1
Marker: TDP43
Marker: TIA1
Marker: TOMM20
Marker: Tubulin
Marker: mitotracker
reducer = umap.UMAP(n_components=2, random_state=42)
embeddings_umap = reducer.fit_transform(embeddings)
plot_umap(embeddings_umap, labels)
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
df.nsmallest(20, 'p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 299 | NCL | NCL | 0.373354 |
| 407 | SON | SON | 0.379395 |
| 84 | DAPI | DAPI | 0.418604 |
| 57 | Calreticulin | Calreticulin | 0.483635 |
| 225 | HNRNPA1 | HNRNPA1 | 0.487893 |
| 429 | TOMM20 | TOMM20 | 0.501046 |
| 135 | FMRP | FMRP | 0.503806 |
| 330 | NONO | NONO | 0.504985 |
| 369 | PSD95 | PSD95 | 0.505048 |
| 390 | Phalloidin | Phalloidin | 0.505826 |
| 432 | Tubulin | Tubulin | 0.506859 |
| 182 | G3BP1 | G3BP1 | 0.508532 |
| 414 | SQSTM1 | SQSTM1 | 0.509300 |
| 425 | TIA1 | TIA1 | 0.511997 |
| 434 | mitotracker | mitotracker | 0.512869 |
| 418 | SQSTM1 | Tubulin | 0.518675 |
| 315 | NEMO | NEMO | 0.522225 |
| 420 | TDP43 | TDP43 | 0.538871 |
| 357 | PML | PML | 0.539121 |
| 159 | FUS | FUS | 0.545235 |
df.loc[df['label1'] != df['label2']].nsmallest(10, 'p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 418 | SQSTM1 | Tubulin | 0.518675 |
| 393 | Phalloidin | SQSTM1 | 0.593089 |
| 83 | Calreticulin | mitotracker | 0.598340 |
| 397 | Phalloidin | Tubulin | 0.601287 |
| 308 | NCL | SON | 0.610600 |
| 396 | Phalloidin | TOMM20 | 0.635211 |
| 379 | PSD95 | mitotracker | 0.635387 |
| 194 | G3BP1 | PURA | 0.635628 |
| 137 | FMRP | G3BP1 | 0.636537 |
| 48 | CLTC | Phalloidin | 0.636593 |
df.loc[df['label1'] == df['label2']].nsmallest(30, 'p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 299 | NCL | NCL | 0.373354 |
| 407 | SON | SON | 0.379395 |
| 84 | DAPI | DAPI | 0.418604 |
| 57 | Calreticulin | Calreticulin | 0.483635 |
| 225 | HNRNPA1 | HNRNPA1 | 0.487893 |
| 429 | TOMM20 | TOMM20 | 0.501046 |
| 135 | FMRP | FMRP | 0.503806 |
| 330 | NONO | NONO | 0.504985 |
| 369 | PSD95 | PSD95 | 0.505048 |
| 390 | Phalloidin | Phalloidin | 0.505826 |
| 432 | Tubulin | Tubulin | 0.506859 |
| 182 | G3BP1 | G3BP1 | 0.508532 |
| 414 | SQSTM1 | SQSTM1 | 0.509300 |
| 425 | TIA1 | TIA1 | 0.511997 |
| 434 | mitotracker | mitotracker | 0.512869 |
| 315 | NEMO | NEMO | 0.522225 |
| 420 | TDP43 | TDP43 | 0.538871 |
| 357 | PML | PML | 0.539121 |
| 159 | FUS | FUS | 0.545235 |
| 29 | CLTC | CLTC | 0.550498 |
| 245 | KIF5A | KIF5A | 0.553527 |
| 204 | GM130 | GM130 | 0.562313 |
| 0 | ANXA11 | ANXA11 | 0.591702 |
| 380 | PURA | PURA | 0.610373 |
| 344 | PEX14 | PEX14 | 0.610649 |
| 110 | DCP1A | DCP1A | 0.611440 |
| 264 | LAMP1 | LAMP1 | 0.628887 |
| 399 | SNCA | SNCA | 0.652723 |
| 282 | LSM14A | LSM14A | 0.695335 |
df.loc[df['label1'] != df['label2']].nlargest(10, 'p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 108 | DAPI | Tubulin | 1.163382 |
| 104 | DAPI | SQSTM1 | 1.144320 |
| 101 | DAPI | Phalloidin | 1.143468 |
| 31 | CLTC | DAPI | 1.112972 |
| 323 | NEMO | SON | 1.103250 |
| 95 | DAPI | NEMO | 1.094177 |
| 300 | NCL | NEMO | 1.093336 |
| 67 | Calreticulin | LSM14A | 1.091403 |
| 187 | G3BP1 | LSM14A | 1.089859 |
| 288 | LSM14A | PSD95 | 1.089667 |
plt.figure()
for i in range(np.shape(embeddings)[1]):
plt.hist(embeddings[:,i])
plt.show()
"""
Given two DataFrames with the same index and a distance column,
prints max and mean absolute difference between Euclid²/2 and cosine,
and shows a scatter plot comparing them.
"""
## df = Distance calculates with Euclidean metric
## dfe = Distance calculates with Cosine metric
# dfe = pd.read_csv('./output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP1_B9_cosine.csv')
# compare_euclidean_cosine(df, dfe,'p50') ## Transform euclidean distances to get cosine distances
Max absolute difference: 1.21e-07 Mean absolute difference: 3.01e-08
plot_dist_histogram(df)
plot_label_clustermap(df, figsize=(9,9), highlight_thresh = 0.8) ## Set threshold to highlight
plot_cluster_proximity_network(df, metric='p50', top_k=200, )
batches = range(1,11)
dfs, names = [], []
for b in batches:
path = (
f"output_distances/"
f"label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP1_B{b}_euclidean_detailed:False.csv"
)
try:
df = pd.read_csv(path)
# strip the "_WT_Untreated" (or everything after first "_")
for col in ("label1","label2"):
df[col] = df[col].str.split(pat='_', n=1).str[0]
dfs.append(df)
names.append(f"batch{b}")
except:
print('batch', b, 'doesnt exists')
merged_df, cols = merge_batches_by_key(dfs, names)
batch 4 doesnt exists batch 5 doesnt exists batch 6 doesnt exists
method = 'spearman'
corr_df, pval_df = correlate_columns(merged_df, cols, method=method)
plot_correlation_heatmap(corr_df, method=method)
plot_pval_heatmap(pval_df, labels=cols)
method = 'pearson'
corr_df, pval_df = correlate_columns(merged_df, cols, method=method)
plot_correlation_heatmap(corr_df, method=method)
plot_pval_heatmap(pval_df, labels=cols)
# 2) pairplot of the p50 columns
sns.pairplot(merged_df[[f"p50_{n}" for n in names]])
plt.suptitle("p50 distributions & relationships", y=1.02)
plt.show()
for label in np.unique(labels):
print('Marker:', label)
plot_boxplot_all_pairs(merged_df[(merged_df['label1'].str.contains(label))|(merged_df['label2'].str.contains(label))])
Marker: ANXA11
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: CLTC
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot
Marker: Calreticulin
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: DAPI
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: DCP1A
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: FMRP
Marker: FUS
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: G3BP1
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: GM130
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: HNRNPA1
Marker: KIF5A
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: LAMP1
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: LSM14A
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: NCL
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: NEMO
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: NONO
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: PEX14
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: PML
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: PSD95
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: PURA
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: Phalloidin
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: SNCA
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: SON
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: SQSTM1
Marker: TDP43
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: TIA1
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: TOMM20
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: Tubulin
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Marker: mitotracker
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
Re‑applying per‑sample L₂ normalization after concatenation restores unit length, ensuring all pairwise Euclidean distances lie in [0, 2].
df2 = pd.read_csv('output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP2_B9_euclidean_detailed:True.csv')
for col in ['label1','label2']:
df2[col] = df2[col].str.split(pat='_', n=1).str[0]
summarize_times(df2)
Max dist_time_s: 0.061s Total dist_time_s: 0.192s Mean dist_time_s: 0.007s Max stats_time_s: 0.047s Total stats_time_s:0.094s Mean stats_time: 0.003s
df2.sort_values('p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 27 | WT | WT | 0.565994 |
| 0 | FUSHeterozygous | FUSHeterozygous | 0.567237 |
| 7 | FUSHomozygous | FUSHomozygous | 0.571463 |
| 1 | FUSHeterozygous | FUSHomozygous | 0.578597 |
| 25 | TDP43 | TDP43 | 0.583789 |
| 13 | FUSRevertant | FUSRevertant | 0.585124 |
| 18 | OPTN | OPTN | 0.585979 |
| 17 | FUSRevertant | WT | 0.590413 |
| 22 | TBK1 | TBK1 | 0.592025 |
| 23 | TBK1 | TDP43 | 0.593684 |
| 26 | TDP43 | WT | 0.596437 |
| 6 | FUSHeterozygous | WT | 0.598882 |
| 24 | TBK1 | WT | 0.603014 |
| 12 | FUSHomozygous | WT | 0.603441 |
| 8 | FUSHomozygous | FUSRevertant | 0.604527 |
| 3 | FUSHeterozygous | OPTN | 0.604579 |
| 20 | OPTN | TDP43 | 0.604914 |
| 16 | FUSRevertant | TDP43 | 0.605965 |
| 2 | FUSHeterozygous | FUSRevertant | 0.606217 |
| 21 | OPTN | WT | 0.607683 |
| 19 | OPTN | TBK1 | 0.608434 |
| 15 | FUSRevertant | TBK1 | 0.608920 |
| 9 | FUSHomozygous | OPTN | 0.609757 |
| 14 | FUSRevertant | OPTN | 0.613143 |
| 5 | FUSHeterozygous | TDP43 | 0.614651 |
| 11 | FUSHomozygous | TDP43 | 0.615442 |
| 10 | FUSHomozygous | TBK1 | 0.617130 |
| 4 | FUSHeterozygous | TBK1 | 0.617523 |
plot_custom_boxplot(df2)
df21 = pd.read_csv('output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP2_B9_euclidean_detailed:True_not_normalized.csv')
for col in ['label1','label2']:
df21[col] = df21[col].str.split(pat='_', n=1).str[0]
plot_custom_boxplot(df21)
corr = df21['p50'].corr(df2['p50'])
print(corr)
0.9999999998979464
(df21['p50'] - df2['p50']).values
array([2.4874258 , 2.53724295, 2.65835798, 2.65118074, 2.70794129,
2.69534588, 2.62619853, 2.50595671, 2.65095109, 2.67388564,
2.70621449, 2.69881535, 2.64619017, 2.56586331, 2.68873096,
2.67021579, 2.65725309, 2.58905828, 2.56961119, 2.66808558,
2.65265036, 2.66479182, 2.59612715, 2.60340238, 2.64431888,
2.56000942, 2.6154772 , 2.4819786 ])
df2.sort_values('p50').index, df21.sort_values('p50').index,
(Index([27, 0, 7, 1, 25, 13, 18, 17, 22, 23, 26, 6, 24, 12, 8, 3, 20, 16,
2, 21, 19, 15, 9, 14, 5, 11, 10, 4],
dtype='int64'),
Index([27, 0, 7, 1, 25, 13, 18, 17, 22, 23, 26, 6, 24, 12, 8, 3, 20, 16,
2, 21, 19, 15, 9, 14, 5, 11, 10, 4],
dtype='int64'))
for label in np.unique(df2[['label1', 'label2']].values.ravel()):
print('Marker:', label)
plot_custom_boxplot(df2[(df2['label1'].str.contains(label))|(df2['label2'].str.contains(label))])
Marker: FUSHeterozygous
Marker: FUSHomozygous
Marker: FUSRevertant
Marker: OPTN
Marker: TBK1
Marker: TDP43
Marker: WT
plot_dist_histogram(df2)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
## Optional: Stretch the scale for visualization
df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_dist_histogram(df2)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))
## Another graph representation
plot_network1(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7), method = 'k')
kamada
batches = range(1,11)
dfs, names = [], []
for b in batches:
path = (
f"output_distances/"
f"label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP2_B{b}_euclidean_detailed:True.csv"
)
try:
df = pd.read_csv(path)
# strip the "_WT_Untreated" (or everything after first "_")
for col in ("label1","label2"):
df[col] = df[col].str.split(pat='_', n=1).str[0]
dfs.append(df)
names.append(f"batch{b}")
except:
print('batch', b, 'doesnt exists')
merged_df, cols = merge_batches_by_key(dfs, names)
batch 4 doesnt exists batch 5 doesnt exists batch 6 doesnt exists
# 1) heatmap of correlations
method = 'spearman'
corr_df, pval_df = correlate_columns(merged_df, cols, method=method)
plot_correlation_heatmap(corr_df, method=method)
plot_pval_heatmap(pval_df, labels=cols)
## Sanity check
from scipy.stats import spearmanr
x1 = merged_df['p50_batch1'].values
x2 = merged_df['p50_batch2'].values
corr, pval = spearmanr(x1, x2)
print("Spearman correlation:", corr)
print("p-value:", pval)
Spearman correlation: 0.840175150519978 p-value: 2.2397505648403477e-08
method = 'pearson'
corr_df, pval_df = correlate_columns(merged_df, cols, method=method)
plot_correlation_heatmap(corr_df, method=method)
plot_pval_heatmap(pval_df, labels=cols)
# 2) pairplot of the p50 columns
sns.pairplot(merged_df[[f"p50_{n}" for n in names]])
plt.suptitle("p50 distributions & relationships", y=1.02)
plt.show()
batches = range(1,11)
dfs, names = [], []
for b in batches:
path = (
f"output_distances/"
f"label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP2_B{b}_euclidean_detailed:True.csv"
)
try:
df = pd.read_csv(path)
# strip the "_WT_Untreated" (or everything after first "_")
for col in ("label1","label2"):
df[col] = df[col].str.split(pat='_', n=1).str[0]
df['p50'] = (df['p50'] - df['p50'].min()) / (df['p50'].max() - df['p50'].min() + 1e-6)
dfs.append(df)
names.append(f"batch{b}")
except:
print('batch', b, 'doesnt exists')
merged_df, cols = merge_batches_by_key(dfs, names)
batch 4 doesnt exists batch 5 doesnt exists batch 6 doesnt exists
method = 'spearman'
corr_df, pval_df = correlate_columns(merged_df, cols, method=method)
plot_correlation_heatmap(corr_df, method=method)
df2 = pd.read_csv('output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP2_B1_euclidean_detailed:True.csv')
for col in ['label1','label2']:
df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 22 | TBK1 | TBK1 | 0.539768 |
| 25 | TDP43 | TDP43 | 0.544963 |
| 7 | FUSHomozygous | FUSHomozygous | 0.545445 |
| 23 | TBK1 | TDP43 | 0.546216 |
| 0 | FUSHeterozygous | FUSHeterozygous | 0.553542 |
| 19 | OPTN | TBK1 | 0.554136 |
| 18 | OPTN | OPTN | 0.555501 |
| 27 | WT | WT | 0.556918 |
| 26 | TDP43 | WT | 0.558402 |
| 20 | OPTN | TDP43 | 0.560053 |
| 24 | TBK1 | WT | 0.560551 |
| 1 | FUSHeterozygous | FUSHomozygous | 0.561452 |
| 13 | FUSRevertant | FUSRevertant | 0.567910 |
| 10 | FUSHomozygous | TBK1 | 0.571288 |
| 21 | OPTN | WT | 0.572287 |
| 11 | FUSHomozygous | TDP43 | 0.572550 |
| 14 | FUSRevertant | OPTN | 0.576138 |
| 4 | FUSHeterozygous | TBK1 | 0.578936 |
| 15 | FUSRevertant | TBK1 | 0.579242 |
| 3 | FUSHeterozygous | OPTN | 0.581632 |
| 5 | FUSHeterozygous | TDP43 | 0.583132 |
| 9 | FUSHomozygous | OPTN | 0.583896 |
| 16 | FUSRevertant | TDP43 | 0.586302 |
| 12 | FUSHomozygous | WT | 0.587277 |
| 2 | FUSHeterozygous | FUSRevertant | 0.590698 |
| 6 | FUSHeterozygous | WT | 0.591614 |
| 17 | FUSRevertant | WT | 0.592803 |
| 8 | FUSHomozygous | FUSRevertant | 0.609057 |
plot_custom_boxplot(df2)
df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))
df2 = pd.read_csv('output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP2_B2_euclidean_detailed:True.csv')
for col in ['label1','label2']:
df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 7 | FUSHomozygous | FUSHomozygous | 0.546937 |
| 0 | FUSHeterozygous | FUSHeterozygous | 0.548499 |
| 22 | TBK1 | TBK1 | 0.548950 |
| 1 | FUSHeterozygous | FUSHomozygous | 0.555976 |
| 23 | TBK1 | TDP43 | 0.562944 |
| 27 | WT | WT | 0.563283 |
| 25 | TDP43 | TDP43 | 0.563887 |
| 24 | TBK1 | WT | 0.567673 |
| 15 | FUSRevertant | TBK1 | 0.569236 |
| 13 | FUSRevertant | FUSRevertant | 0.569553 |
| 18 | OPTN | OPTN | 0.569705 |
| 19 | OPTN | TBK1 | 0.570389 |
| 26 | TDP43 | WT | 0.572139 |
| 4 | FUSHeterozygous | TBK1 | 0.574397 |
| 21 | OPTN | WT | 0.575519 |
| 10 | FUSHomozygous | TBK1 | 0.577372 |
| 16 | FUSRevertant | TDP43 | 0.579854 |
| 20 | OPTN | TDP43 | 0.580375 |
| 3 | FUSHeterozygous | OPTN | 0.581884 |
| 11 | FUSHomozygous | TDP43 | 0.583450 |
| 17 | FUSRevertant | WT | 0.583860 |
| 6 | FUSHeterozygous | WT | 0.584609 |
| 5 | FUSHeterozygous | TDP43 | 0.585057 |
| 9 | FUSHomozygous | OPTN | 0.586790 |
| 2 | FUSHeterozygous | FUSRevertant | 0.587176 |
| 14 | FUSRevertant | OPTN | 0.588517 |
| 12 | FUSHomozygous | WT | 0.590301 |
| 8 | FUSHomozygous | FUSRevertant | 0.593408 |
plot_custom_boxplot(df2)
df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))
df2 = pd.read_csv('output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP2_B3_euclidean_detailed:True.csv')
for col in ['label1','label2']:
df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 7 | FUSHomozygous | FUSHomozygous | 0.537325 |
| 25 | TDP43 | TDP43 | 0.547847 |
| 22 | TBK1 | TBK1 | 0.550113 |
| 1 | FUSHeterozygous | FUSHomozygous | 0.553182 |
| 0 | FUSHeterozygous | FUSHeterozygous | 0.553599 |
| 13 | FUSRevertant | FUSRevertant | 0.560934 |
| 19 | OPTN | TBK1 | 0.562634 |
| 18 | OPTN | OPTN | 0.562660 |
| 20 | OPTN | TDP43 | 0.563746 |
| 23 | TBK1 | TDP43 | 0.563829 |
| 15 | FUSRevertant | TBK1 | 0.568780 |
| 27 | WT | WT | 0.568916 |
| 26 | TDP43 | WT | 0.571280 |
| 24 | TBK1 | WT | 0.573646 |
| 11 | FUSHomozygous | TDP43 | 0.574541 |
| 16 | FUSRevertant | TDP43 | 0.574626 |
| 10 | FUSHomozygous | TBK1 | 0.574691 |
| 21 | OPTN | WT | 0.575098 |
| 14 | FUSRevertant | OPTN | 0.576178 |
| 4 | FUSHeterozygous | TBK1 | 0.578427 |
| 9 | FUSHomozygous | OPTN | 0.579640 |
| 12 | FUSHomozygous | WT | 0.583898 |
| 8 | FUSHomozygous | FUSRevertant | 0.585595 |
| 17 | FUSRevertant | WT | 0.587489 |
| 3 | FUSHeterozygous | OPTN | 0.587855 |
| 2 | FUSHeterozygous | FUSRevertant | 0.588360 |
| 5 | FUSHeterozygous | TDP43 | 0.589129 |
| 6 | FUSHeterozygous | WT | 0.591615 |
plot_custom_boxplot(df2)
df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))
df2 = pd.read_csv('output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP2_B7_euclidean_detailed:True.csv')
for col in ['label1','label2']:
df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 0 | FUSHeterozygous | FUSHeterozygous | 0.586453 |
| 1 | FUSHeterozygous | FUSHomozygous | 0.595821 |
| 27 | WT | WT | 0.598045 |
| 7 | FUSHomozygous | FUSHomozygous | 0.600153 |
| 22 | TBK1 | TBK1 | 0.606335 |
| 25 | TDP43 | TDP43 | 0.607592 |
| 13 | FUSRevertant | FUSRevertant | 0.608848 |
| 18 | OPTN | OPTN | 0.610792 |
| 20 | OPTN | TDP43 | 0.614007 |
| 5 | FUSHeterozygous | TDP43 | 0.615811 |
| 23 | TBK1 | TDP43 | 0.615820 |
| 16 | FUSRevertant | TDP43 | 0.615921 |
| 6 | FUSHeterozygous | WT | 0.615967 |
| 14 | FUSRevertant | OPTN | 0.617581 |
| 26 | TDP43 | WT | 0.617753 |
| 2 | FUSHeterozygous | FUSRevertant | 0.618077 |
| 15 | FUSRevertant | TBK1 | 0.618106 |
| 11 | FUSHomozygous | TDP43 | 0.618327 |
| 17 | FUSRevertant | WT | 0.620087 |
| 10 | FUSHomozygous | TBK1 | 0.620390 |
| 12 | FUSHomozygous | WT | 0.620398 |
| 8 | FUSHomozygous | FUSRevertant | 0.621014 |
| 4 | FUSHeterozygous | TBK1 | 0.621732 |
| 24 | TBK1 | WT | 0.621907 |
| 19 | OPTN | TBK1 | 0.623820 |
| 21 | OPTN | WT | 0.624341 |
| 9 | FUSHomozygous | OPTN | 0.626320 |
| 3 | FUSHeterozygous | OPTN | 0.629806 |
plot_custom_boxplot(df2)
df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))
df2 = pd.read_csv('output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP2_B8_euclidean_detailed:True.csv')
for col in ['label1','label2']:
df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 0 | FUSHeterozygous | FUSHeterozygous | 0.565329 |
| 7 | FUSHomozygous | FUSHomozygous | 0.566815 |
| 1 | FUSHeterozygous | FUSHomozygous | 0.575664 |
| 13 | FUSRevertant | FUSRevertant | 0.578966 |
| 27 | WT | WT | 0.581016 |
| 25 | TDP43 | TDP43 | 0.589651 |
| 16 | FUSRevertant | TDP43 | 0.593431 |
| 18 | OPTN | OPTN | 0.593571 |
| 14 | FUSRevertant | OPTN | 0.594822 |
| 20 | OPTN | TDP43 | 0.596580 |
| 22 | TBK1 | TBK1 | 0.596835 |
| 17 | FUSRevertant | WT | 0.599553 |
| 26 | TDP43 | WT | 0.600953 |
| 15 | FUSRevertant | TBK1 | 0.601802 |
| 23 | TBK1 | TDP43 | 0.602847 |
| 19 | OPTN | TBK1 | 0.605200 |
| 24 | TBK1 | WT | 0.605530 |
| 2 | FUSHeterozygous | FUSRevertant | 0.605901 |
| 21 | OPTN | WT | 0.606484 |
| 8 | FUSHomozygous | FUSRevertant | 0.607427 |
| 12 | FUSHomozygous | WT | 0.608657 |
| 5 | FUSHeterozygous | TDP43 | 0.609097 |
| 11 | FUSHomozygous | TDP43 | 0.611875 |
| 6 | FUSHeterozygous | WT | 0.612467 |
| 3 | FUSHeterozygous | OPTN | 0.615133 |
| 9 | FUSHomozygous | OPTN | 0.617688 |
| 10 | FUSHomozygous | TBK1 | 0.621561 |
| 4 | FUSHeterozygous | TBK1 | 0.622641 |
plot_custom_boxplot(df2)
df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))
df2 = pd.read_csv('output_distances/label_pair_distances_stats_newNeuronsD8FigureConfig_UMAP2_B10_euclidean_detailed:True.csv')
for col in ['label1','label2']:
df2[col] = df2[col].str.split(pat='_', n=1).str[0]
df2.sort_values('p50')[['label1', 'label2', 'p50']]
| label1 | label2 | p50 | |
|---|---|---|---|
| 27 | WT | WT | 0.566041 |
| 7 | FUSHomozygous | FUSHomozygous | 0.566043 |
| 25 | TDP43 | TDP43 | 0.573323 |
| 26 | TDP43 | WT | 0.575716 |
| 18 | OPTN | OPTN | 0.578197 |
| 0 | FUSHeterozygous | FUSHeterozygous | 0.584005 |
| 20 | OPTN | TDP43 | 0.584506 |
| 21 | OPTN | WT | 0.587898 |
| 22 | TBK1 | TBK1 | 0.600577 |
| 13 | FUSRevertant | FUSRevertant | 0.602216 |
| 19 | OPTN | TBK1 | 0.603291 |
| 23 | TBK1 | TDP43 | 0.606942 |
| 1 | FUSHeterozygous | FUSHomozygous | 0.607831 |
| 11 | FUSHomozygous | TDP43 | 0.609539 |
| 9 | FUSHomozygous | OPTN | 0.610879 |
| 12 | FUSHomozygous | WT | 0.611131 |
| 24 | TBK1 | WT | 0.615742 |
| 14 | FUSRevertant | OPTN | 0.617803 |
| 15 | FUSRevertant | TBK1 | 0.618171 |
| 10 | FUSHomozygous | TBK1 | 0.619661 |
| 2 | FUSHeterozygous | FUSRevertant | 0.623090 |
| 16 | FUSRevertant | TDP43 | 0.633048 |
| 8 | FUSHomozygous | FUSRevertant | 0.633272 |
| 4 | FUSHeterozygous | TBK1 | 0.636378 |
| 17 | FUSRevertant | WT | 0.642617 |
| 3 | FUSHeterozygous | OPTN | 0.644534 |
| 5 | FUSHeterozygous | TDP43 | 0.659667 |
| 6 | FUSHeterozygous | WT | 0.669677 |
plot_custom_boxplot(df2)
# df2['p50'] = (df2['p50'] - df2['p50'].min()) / (df2['p50'].max() - df2['p50'].min() + 1e-6)
plot_label_clustermap(df2, figsize=(6,6), highlight_thresh=df2[df2.label1 == df2.label2]['p50'].max())
plot_cluster_proximity_network(df2, threshold = df2['p50'].quantile(0.9), figsize=(7,7))
plot_boxplot_all_pairs(merged_df)
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:472: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. # Plot /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. )
plot_boxplot_all_pairs(merged_df[merged_df.label1!=merged_df.label2])
/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:480: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect. sns.boxplot( /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA/tools/examine_measures/visualizations.py:488: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_xticklabels([
plot_p50_across_batches(merged_df)
summary_df = df2
# 1. compute intra‑group medians
intra = (
summary_df
.loc[summary_df['label1'] == summary_df['label2'], ['label1','p50']]
.set_index('label1')['p50']
)
summary_df['intra_p50_1'] = summary_df['label1'].map(intra)
summary_df['intra_p50_2'] = summary_df['label2'].map(intra)
# 2. filter to inter‑group pairs
df_inter = summary_df[summary_df['label1'] != summary_df['label2']].copy()
# 3. compute baselines
df_inter['baseline_arith'] = (df_inter['intra_p50_1'] + df_inter['intra_p50_2']) / 2
df_inter['baseline_geo'] = np.sqrt(df_inter['intra_p50_1'] * df_inter['intra_p50_2'])
# 4. compute normalized metrics
d = df_inter['p50']
df_inter['ratio_arith'] = d / df_inter['baseline_arith']
df_inter['ratio_geo'] = d / df_inter['baseline_geo']
df_inter['pct_diff'] = (d - df_inter['baseline_arith']) / df_inter['baseline_arith']
df_inter['silhouette_score'] = (d - df_inter['baseline_arith']) / np.maximum(d, df_inter['baseline_arith'])
# 5. plot each metric as a bar chart
metrics = ['ratio_arith', 'ratio_geo', 'pct_diff', 'silhouette_score']
labels = df_inter['label1'] + ' vs ' + df_inter['label2']
for m in metrics:
fig, ax = plt.subplots(figsize=(max(8, len(df_inter)*0.3), 6))
ax.bar(range(len(df_inter)), df_inter[m])
ax.set_xticks(range(len(df_inter)))
ax.set_xticklabels(labels, rotation=90, fontsize=8)
ax.set_ylabel(m)
ymin, ymax = df_inter[m].min(), df_inter[m].max()
ax.set_ylim(ymin*0.95, ymax*1.05)
ax.set_title(f"{m.replace('_',' ').title()} by Label Pair")
ax.grid(axis="y", linestyle="--", alpha=0.4)
plt.tight_layout()
plt.show()